library(tidyverse)
## Warning: package 'tidyverse' was built under R version 4.0.3
## ── Attaching packages ───────────────────────────────────────────────────────────────────── tidyverse 1.3.0 ──
## ✓ ggplot2 3.3.1     ✓ purrr   0.3.4
## ✓ tibble  3.0.1     ✓ dplyr   1.0.0
## ✓ tidyr   1.1.0     ✓ stringr 1.4.0
## ✓ readr   1.3.1     ✓ forcats 0.5.0
## Warning: package 'purrr' was built under R version 4.0.3
## Warning: package 'stringr' was built under R version 4.0.3
## ── Conflicts ──────────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(MOFA2)
## 
## Attaching package: 'MOFA2'
## The following object is masked from 'package:stats':
## 
##     predict
library(here)
## here() starts at /home/rstudio/promise
#library(MOFAdata)
library(ReactomePA)
## 
## ReactomePA v1.34.0  For help: https://guangchuangyu.github.io/ReactomePA
## 
## If you use ReactomePA in published research, please cite:
## Guangchuang Yu, Qing-Yu He. ReactomePA: an R/Bioconductor package for reactome pathway analysis and visualization. Molecular BioSystems 2016, 12(2):477-479
library(biomaRt)
library(cowplot)
## 
## ********************************************************
## Note: As of version 1.0.0, cowplot does not change the
##   default ggplot2 theme anymore. To recover the previous
##   behavior, execute:
##   theme_set(theme_cowplot())
## ********************************************************
library(enrichplot)
library(dplyr)
library(clusterProfiler)
## clusterProfiler v3.18.1  For help: https://guangchuangyu.github.io/software/clusterProfiler
## 
## If you use clusterProfiler in published research, please cite:
## Guangchuang Yu, Li-Gen Wang, Yanyan Han, Qing-Yu He. clusterProfiler: an R package for comparing biological themes among gene clusters. OMICS: A Journal of Integrative Biology. 2012, 16(5):284-287.
## 
## Attaching package: 'clusterProfiler'
## The following object is masked from 'package:biomaRt':
## 
##     select
## The following object is masked from 'package:purrr':
## 
##     simplify
## The following object is masked from 'package:stats':
## 
##     filter
# input
model <- load_model(here("models/mofa/model.hdf5"))

# gene expression data
promise_long_filtered_top <- readRDS(here::here('data/processed/expression/promise_expr_filtered_tidy_top.rds'))

# organoid morphology
umap_df <- readRDS(here::here("data/processed/PhenotypeSpectrum/umap_absolute_all_drugs_sampled.Rds"))

# organoid size
organoid_size_fit <- readRDS(here::here("data/processed/morphology/organoid_size.Rds")) %>% 
  filter(!line %in% c('D055T01', 'D020T02', 'D021T01')) %>% 
  #filter(!line %in% c('D055T01','D020T02')) %>% 
  mutate(line = as.character(line)) %>% 
  dplyr::select(line, size = x, rep = replicate) %>% 
  distinct() %>% arrange(line) %>%
  mutate(line = substr(line, 1, 4)) %>% 
  mutate(rep = paste0("r", rep))

# morphology classification
organoid_morphology <- read_delim(here::here("references/imaging/visual_classification_organoids.csv"), ";", escape_double = FALSE, trim_ws = TRUE) %>% 
  dplyr::select(line = organoid, morphology = visual_inspection_v2) %>%
  mutate(line = substr(line, 1, 4)) %>% 
  mutate(morphology = if_else(is.na(morphology), "other", morphology))
## Parsed with column specification:
## cols(
##   organoid = col_character(),
##   visual_inspection_morphology_2017 = col_character(),
##   visual_class_2_2017 = col_double(),
##   visual_inspection_v2 = col_character(),
##   visual_inspection_size_2017 = col_character(),
##   visual_class_1_2017 = col_double(),
##   visual_size_ranking_2018 = col_double(),
##   visual_cystic_ranking_2018 = col_double(),
##   clustering_jan = col_character()
## )
# drug activity data
data('aucroc', package = 'SCOPEAnalysis')
drug_activity <- aucroc %>% filter(!line %in% c('D055T01', 'D021T01', 'D054T01'))

# gene expression annotation 
intestinal_sig <- readxl::read_excel(here('data/external/expression/merloz-suarez_sigantures.xls'), 
                                     sheet = 1, skip = 4) %>% .[,1:4] %>%
  gather(signature, symbol) %>% drop_na() %>% 
  mutate(symbol = gsub('\\*', '', symbol))
## New names:
## * `` -> ...5
cris_sig <- readxl::read_excel(here('data/external/expression/cris/41467_2017_BFncomms15107_MOESM422_ESM.xlsx'), sheet = 1, skip = 2) %>% 
  dplyr::rename(symbol = `Gene ID`, signature = `CRIS Class`)

# organoid mutation
mofa_genetics <- read_delim(here::here("data/processed/mutation/Table-S2_Mutations_PDOs_RevisionV4.csv"), delim = ";") %>% 
    janitor::clean_names() %>% 
    mutate(sample = substr(sample, 2,nchar(sample)-1)) %>% 
    mutate(sample = paste0("D", sample)) %>% 
    dplyr::filter(!sample %in% c("D021", "D015")) %>%
    expand_grid(replicate = c("r1", "r2")) %>% 
    mutate(sample = paste0(sample, "_", replicate)) %>% 
    dplyr::select(sample, feature = symbol, everything()) %>% dplyr::select(sample, feature) %>% 
    mutate(value = 1) %>%
    complete(sample, feature, fill = list(value = 0)) %>% 
    distinct(sample, feature, value) %>%
    mutate(view = "mutation")
## Parsed with column specification:
## cols(
##   SAMPLE = col_character(),
##   SYMBOL = col_character(),
##   Protein_position = col_character(),
##   Amino_acids = col_character(),
##   Consequence = col_character()
## )
weights <- model@expectations$Z$single_group %>% as.data.frame() %>% rownames_to_column("id") %>% as_tibble() %>% janitor::clean_names()
loadings_size <- model@expectations$W$size_view %>% as.data.frame() %>% rownames_to_column("id") %>% as_tibble() %>% janitor::clean_names()
loadings_expression <- model@expectations$W$expression %>% as.data.frame() %>% rownames_to_column("id") %>% as_tibble() %>% janitor::clean_names()
loadings_morphology <- model@expectations$W$morphology %>% as.data.frame() %>% rownames_to_column("id") %>% as_tibble() %>% janitor::clean_names()
loadings_drug_activity <- model@expectations$W$drug_activity %>% as.data.frame() %>% rownames_to_column("id") %>% as_tibble() %>% janitor::clean_names()
loadings_mutation <- model@expectations$W$mutation %>% as.data.frame() %>% rownames_to_column("id") %>% as_tibble() %>% janitor::clean_names()

QC

plot_data_overview(model) + 
  ggsave(here::here("reports/figures/mofa/data_overview.pdf"))
## Saving 7 x 5 in image

df <- head(model@cache$variance_explained$r2_total[[1]]) %>%
  as.data.frame() %>%
  rownames_to_column("feature") %>%
  magrittr::set_colnames(c("feature", "var")) %>%
  mutate(var = var/100)

df %>%
  ggplot(aes(feature, var)) + 
  geom_bar(stat = "identity") + 
  coord_flip() + 
  theme_cowplot() + 
  labs(y = "R2") + 
  ggsave(here::here("reports/figures/mofa/var_explained_feature.pdf"))
## Saving 7 x 5 in image

Breakdown by factors

model@cache$variance_explained$r2_per_factor$single_group
##         drug_activity expression morphology_view  mutation     size_view
## Factor1      16.33660   14.65125        1.005191 13.997723  3.923018e+01
## Factor2      12.25793   13.05513       15.602844  8.586737  6.894016e-02
## Factor3      12.84796   10.37584        7.765737  3.413264 -2.116287e-06
model@cache$variance_explained$r2_per_factor$single_group %>% as.data.frame() %>% 
  rownames_to_column("factor") %>% 
  mutate(overall_variance = expression + morphology_view + size_view + drug_activity) %>%
  mutate(overall_variance = overall_variance/4) %>%
  ggplot(aes(factor, overall_variance)) + 
  geom_point(size = 2) + 
  theme_cowplot()

gg_ve <- plot_variance_explained(model, x="view", y="factor")
gg_ve + coord_flip() + 
  #coord_equal() +
  ggsave(here::here("reports/figures/mofa/var_explained.pdf"))
## Saving 7 x 5 in image

ph_cor <- weights %>% 
  as.data.frame() %>% 
  column_to_rownames("id") %>% 
  cor() %>% 
  pheatmap::pheatmap()

weights %>% 
  as.data.frame() %>% 
  column_to_rownames("id") %>% 
  cor() %>% as.vector() %>% 
  .[. != 1] %>% 
  summary()
##      Min.   1st Qu.    Median      Mean   3rd Qu.      Max. 
## -0.114913 -0.084456  0.006913 -0.028706  0.018139  0.021880
plot_factor(model, 
  factor = 1:3
)

factor overview

umap_factor <- weights %>% 
  separate(id, c("line", "replicate"), sep = "_") %>% 
  mutate(line = paste0(line, "T01"),
         replicate = substr(replicate, 2,2)) %>% 
  left_join(umap_df %>% 
  filter(drug == "DMSO"), .) %>% 
  filter(line != "D020T02") 
## Joining, by = c("line", "replicate")
gg_factor_umap <- umap_factor %>% 
  dplyr::select(-size_factor) %>%
  pivot_longer(cols = contains("factor"), names_to = "number", values_to = "value") %>%
  ggplot(aes(v1, v2, color = value)) + 
  ggrastr::geom_point_rast(alpha = 0.1, size = 0.35) + 
  scale_color_viridis_c() +
  cowplot::theme_cowplot() +
  labs(x = "UMAP 1",
       y = "UMAP 2",
       color = "factor value") + 
  facet_wrap(~ number) +
  theme(legend.position = "bottom") + 
    coord_fixed()

gg_factor_umap + ggsave(here("reports/figures/mofa/factor_overview.pdf"), height = 6 , width = 6)

gg_f12 <- weights %>% 
  dplyr::rename(line = id) %>%
  left_join(organoid_size_fit %>% mutate(line = paste0(line, "_", rep))) %>%
  left_join(organoid_morphology %>% 
            mutate(line = substr(line, 1, 4)) %>% 
  expand_grid(., rep = c("r1", "r2")) %>% 
  mutate(sample = paste0(line, "_", rep)) %>% dplyr::select(-line, line = sample)) %>%
  distinct() %>%
  ggplot(aes(factor1, factor2, label = line, color = size, shape = morphology)) + 
  geom_point(size = 4) + 
  ggrepel::geom_text_repel(color = "black") + 
  scale_color_viridis_c() + 
  theme_cowplot() + 
  coord_fixed()
## Joining, by = "line"
## Joining, by = c("line", "rep")
gg_f12 + ggsave(here("reports/figures/mofa/factor_12_plot.pdf"), height = 6 , width = 6)

gg_f23 <- weights %>% 
  dplyr::rename(line = id) %>%
  left_join(organoid_size_fit %>% mutate(line = paste0(line, "_", rep))) %>%
  ggplot(aes(factor1, factor3, label = line)) + 
  geom_point(size = 2) + 
  ggrepel::geom_text_repel(color = "black") + 
  scale_color_viridis_c() + 
  theme_cowplot() + 
    theme_cowplot() + 
  coord_fixed() 
## Joining, by = "line"
gg_f23 + ggsave(here("reports/figures/mofa/factor_23_plot.pdf"), height = 5 , width = 5)

mutations

loadings_mutation %>%
  column_to_rownames("id") %>% 
  pheatmap::pheatmap()

df <- loadings_mutation %>% 
  gather(key = "factor", value = "weight", -id) 

df %>%
  ggplot(aes(factor, weight, label = id)) +
  geom_point() + 
  theme_cowplot() + 
  ggrepel::geom_text_repel(data = df %>% filter(id %in% c("NRAS", "ERBB2", "PTEN", "KRAS", "PIK3CA")))

drug response

## load drug annotation
drug_anno <- readxl::read_excel(here::here('references/layouts/Compound_Annotation_Libraries_New.xlsx')) %>% distinct(drug = `Compound name`, target = `Primary Target`)

loadings_drug_activity_tidy <- loadings_drug_activity %>% 
  mutate(drug = substr(id, 7, nchar(.))) %>% 
  dplyr::select(-id) %>% 
  arrange(desc(factor1)) %>% 
  left_join(drug_anno)
## Joining, by = "drug"
# 
# top_n <- 20
# 
# top_drugs <- rbind(
#   loadings_drug_activity_tidy %>% arrange(factor1) %>% head(top_n),
#   loadings_drug_activity_tidy %>% arrange(factor1) %>% tail(top_n),
#   loadings_drug_activity_tidy %>% arrange(factor2) %>% head(top_n),
#   loadings_drug_activity_tidy %>% arrange(factor2) %>% tail(top_n),
#   loadings_drug_activity_tidy %>% arrange(factor3) %>% head(top_n),
#   loadings_drug_activity_tidy %>% arrange(factor3) %>% tail(top_n)
# ) %>% distinct()
# 
# top_drugs %>% 
#   column_to_rownames("drug") %>% 
#   pheatmap::pheatmap()
df <- loadings_drug_activity_tidy %>% dplyr::select(-target) %>% 
  gather(factor_name, value, -drug) %>% 
  distinct() 

row_anno <-  df %>% left_join(loadings_drug_activity_tidy) %>% dplyr::select(drug, target) %>%
  mutate(id = paste0(drug, "_", target)) %>% distinct() %>% as.data.frame() %>% column_to_rownames("id") 
## Joining, by = "drug"
df %>%
  spread(drug, value) %>% 
  dplyr::select(-factor_name) %>%
  cor() %>% 
  as.matrix() %>% 
  pheatmap::pheatmap(show_rownames = FALSE, 
                     show_colnames = FALSE)

drug_activity_test <- loadings_drug_activity_tidy %>% semi_join(loadings_drug_activity_tidy %>% 
                                            dplyr::count(target) %>% 
                                            filter(n >=5)) 
## Joining, by = "target"
drug_activity_test_result <- rbind(
  lm(factor1 ~ target, data = drug_activity_test) %>% summary() %>% broom::tidy() %>% mutate(factor = "factor1"),
  lm(factor2 ~ target, data = drug_activity_test) %>% summary() %>% broom::tidy() %>% mutate(factor = "factor2"),
  lm(factor3 ~ target, data = drug_activity_test) %>% summary() %>% broom::tidy() %>% mutate(factor = "factor3")) 

df <- drug_activity_test_result %>% 
  filter(term != "(Intercept)") %>% 
  dplyr::select(term, statistic, factor) %>% 
  mutate(term = substr(term, 7, nchar(term)))

df %>% 
  spread(factor, statistic) %>% 
  column_to_rownames("term") %>% 
  pheatmap::pheatmap()

drug_activity_test_result %>% 
  mutate(fdr = p.adjust(p.value, method = "BH")) %>% 
  filter(fdr <= 0.2) %>% 
  filter(term != "(Intercept)") %>% 
  arrange(factor, statistic)
## # A tibble: 6 x 7
##   term               estimate std.error statistic      p.value factor        fdr
##   <chr>                 <dbl>     <dbl>     <dbl>        <dbl> <chr>       <dbl>
## 1 targetEGFR           -0.522    0.102      -5.11      8.39e-7 facto…    2.64e-5
## 2 targetMEK            -0.269    0.0956     -2.81      5.50e-3 facto…    6.93e-2
## 3 targetSrc             0.250    0.103       2.42      1.66e-2 facto…    1.70e-1
## 4 targetWnt/beta-ca…    0.353    0.117       3.02      2.94e-3 facto…    4.64e-2
## 5 targetEGFR            0.275    0.0894      3.08      2.44e-3 facto…    4.64e-2
## 6 targetAurora Kina…    0.574    0.0894      6.42      1.28e-9 facto…    8.09e-8

exporting factors loadings for enrichment

# top 100 genes
loadings_expression %>% arrange(desc(factor1)) %>% head(100) %>% 
  dplyr::mutate(id = substr(id, 1, nchar(id)-11)) %>% 
  arrange(desc(id)) %>%
  dplyr::select(id) %>% write_csv(here::here("reports/tables/factor1_top.csv"))

loadings_expression %>% arrange(desc(factor2)) %>% head(100) %>% 
  dplyr::mutate(id = substr(id, 1, nchar(id)-11)) %>% 
  dplyr::select(id) %>% write_csv(here::here("reports/tables/factor2_top.csv"))

loadings_expression %>% arrange(desc(factor3)) %>% head(100) %>% 
  dplyr::mutate(id = substr(id, 1, nchar(id)-11)) %>% 
  dplyr::select(id) %>% write_csv(here::here("reports/tables/factor3_top.csv"))

# all genes
loadings_expression %>% arrange(desc(factor1)) %>% 
  dplyr::mutate(id = substr(id, 1, nchar(id)-11)) %>% 
  arrange(desc(id)) %>%
  dplyr::select(id, factor1) %>% write_csv(here::here("reports/tables/factor1_all.csv"))

loadings_expression %>% arrange(desc(factor2)) %>% 
  dplyr::mutate(id = substr(id, 1, nchar(id)-11)) %>% 
  dplyr::select(id, factor2) %>% write_csv(here::here("reports/tables/factor2_all.csv"))

loadings_expression %>% arrange(desc(factor3)) %>% 
  dplyr::mutate(id = substr(id, 1, nchar(id)-11)) %>% 
  dplyr::select(id, factor3) %>% write_csv(here::here("reports/tables/factor3_all.csv"))

factor 1

size

weights %>% 
  dplyr::rename(line = id) %>%
  left_join(organoid_size_fit %>% mutate(line = paste0(line, "_", rep))) %>%
  left_join(organoid_morphology %>% 
            mutate(line = substr(line, 1, 4)) %>% 
  expand_grid(., rep = c("r1", "r2")) %>% 
  mutate(sample = paste0(line, "_", rep)) %>% dplyr::select(-line, line = sample)) %>%
  distinct() %>%
  ggplot(aes(factor1, size, label = line)) + 
  geom_smooth(method = "lm", se = FALSE) +
  geom_point(size = 4) + 
  ggrepel::geom_text_repel(color = "black") + 
  scale_color_viridis_c() + 
  labs(y = "expected size") +
  theme_cowplot()
## Joining, by = "line"
## Joining, by = c("line", "rep")
## `geom_smooth()` using formula 'y ~ x'

gene expression

plot_weights(model,
  view = "expression",
  factor = 1,
  nfeatures = 10,     # Number of features to highlight
  scale = T,          # Scale weights from -1 to 1
  abs = F             # Take the absolute value?
)
## Warning: Ignoring unknown parameters: max.overlaps

growth control via IGF signaling cell adhesion via FYN signaling, high fibronectin expression DLX- TGFb and BMP inhibiting transcription factor

df <- loadings_expression %>% 
  separate(id, c("symbol"), sep = "_exp") %>% 
  filter(symbol != "") %>%
  left_join(promise_long_filtered_top %>% dplyr::select(symbol, entrez) %>% distinct()) %>%
  arrange(desc(factor1)) %>%
  mutate(order = nrow(.):1)
## Warning: Expected 1 pieces. Additional pieces discarded in 3222 rows [1, 2, 3,
## 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, ...].
## Joining, by = "symbol"
df %>% 
  ggplot(aes(order, factor1)) + 
  geom_point()+ 
  geom_point(data = df %>% filter(symbol %in% c("H19", "IGF2")), 
             color = "blue") + 
  theme_cowplot()

df <- loadings_expression %>% 
  separate(id, c("symbol"), sep = "_exp") %>% 
  filter(symbol != "") %>%
  left_join(promise_long_filtered_top %>% dplyr::select(symbol, entrez) %>% distinct()) %>%
  arrange(desc(factor1))
## Warning: Expected 1 pieces. Additional pieces discarded in 3222 rows [1, 2, 3,
## 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, ...].
## Joining, by = "symbol"
ranks_1 <- setNames(df$factor1, as.character(df$entrez))
ranks_1 <- sort(ranks_1, decreasing = T)

## Reactome enrichment analysis
gse_reactome_1 <- gsePathway(
  geneList = ranks_1,
  organism = 'human',
  #nPerm = 1e5,
  #minGSSize = 100,
  #maxGSSize = 500,
  pvalueCutoff = 0.2
)
## Loading required package: org.Hs.eg.db
## Loading required package: AnnotationDbi
## Loading required package: stats4
## Loading required package: BiocGenerics
## Loading required package: parallel
## 
## Attaching package: 'BiocGenerics'
## The following objects are masked from 'package:parallel':
## 
##     clusterApply, clusterApplyLB, clusterCall, clusterEvalQ,
##     clusterExport, clusterMap, parApply, parCapply, parLapply,
##     parLapplyLB, parRapply, parSapply, parSapplyLB
## The following objects are masked from 'package:dplyr':
## 
##     combine, intersect, setdiff, union
## The following objects are masked from 'package:stats':
## 
##     IQR, mad, sd, var, xtabs
## The following objects are masked from 'package:base':
## 
##     anyDuplicated, append, as.data.frame, basename, cbind, colnames,
##     dirname, do.call, duplicated, eval, evalq, Filter, Find, get, grep,
##     grepl, intersect, is.unsorted, lapply, Map, mapply, match, mget,
##     order, paste, pmax, pmax.int, pmin, pmin.int, Position, rank,
##     rbind, Reduce, rownames, sapply, setdiff, sort, table, tapply,
##     union, unique, unsplit, which.max, which.min
## Loading required package: Biobase
## Welcome to Bioconductor
## 
##     Vignettes contain introductory material; view with
##     'browseVignettes()'. To cite Bioconductor, see
##     'citation("Biobase")', and for packages 'citation("pkgname")'.
## Loading required package: IRanges
## Loading required package: S4Vectors
## 
## Attaching package: 'S4Vectors'
## The following object is masked from 'package:clusterProfiler':
## 
##     rename
## The following objects are masked from 'package:dplyr':
## 
##     first, rename
## The following object is masked from 'package:tidyr':
## 
##     expand
## The following object is masked from 'package:base':
## 
##     expand.grid
## 
## Attaching package: 'IRanges'
## The following object is masked from 'package:clusterProfiler':
## 
##     slice
## The following objects are masked from 'package:dplyr':
## 
##     collapse, desc, slice
## The following object is masked from 'package:purrr':
## 
##     reduce
## 
## Attaching package: 'AnnotationDbi'
## The following object is masked from 'package:clusterProfiler':
## 
##     select
## The following object is masked from 'package:dplyr':
## 
##     select
## 
## preparing geneSet collections...
## GSEA analysis...
## Warning in preparePathwaysAndStats(pathways, stats, minSize, maxSize,
## gseaParam, : There are duplicate gene names, fgsea may produce unexpected
## results.
## leading edge analysis...
## done...
reactome <- pairwise_termsim(gse_reactome_1) 
emapplot(reactome, color = "NES")

gse_go <- gseGO(
  geneList = ranks_1,
  OrgDb = org.Hs.eg.db,
  ont = 'BP',
  # nPerm = 1e5,
  # minGSSize = 10,
  # maxGSSize = 500,
  pvalueCutoff = 0.2
)

go <- pairwise_termsim(gse_go) 
emapplot(go, color = "NES")

suarez signature

# run gsea with clusterprofiler
df = loadings_expression %>% drop_na() %>%
  mutate(symbol = substr(id, 1, nchar(id)-11)) %>%
  dplyr::select(-id) %>%
  left_join(promise_long_filtered_top) %>%
  dplyr::select(factor1, symbol) %>% distinct() %>%
  arrange(desc(factor1))
## Joining, by = "symbol"
ranks_symbol = setNames(df$factor1, as.character(df$symbol))

gse_sig <- GSEA(
  geneList = ranks_symbol,
  TERM2GENE = intestinal_sig,
  nPerm = 1e5,
  minGSSize = 1,
  maxGSSize = 1000,
  pvalueCutoff = 1
)
## preparing geneSet collections...
## GSEA analysis...
## Warning in .GSEA(geneList = geneList, exponent = exponent, minGSSize =
## minGSSize, : We do not recommend using nPerm parameter incurrent and future
## releases
## Warning in fgsea(pathways = geneSets, stats = geneList, nperm = nPerm, minSize
## = minGSSize, : You are trying to run fgseaSimple. It is recommended to use
## fgseaMultilevel. To run fgseaMultilevel, you need to remove the nperm argument
## in the fgsea function call.
## leading edge analysis...
## done...
## output as tibble
gse_sig_tbl <- as_tibble(gse_sig)
## proliferation
gseaplot2(
  gse_sig, geneSetID = gse_sig$ID[1],
  title = paste0(gse_sig$ID[1], 
                ' (p = ', round(gse_sig_tbl$pvalue[1], 3), 
                '; NES = ', round(gse_sig_tbl$NES[1], 2), ')')
)

## late TA signature
gseaplot2(
  gse_sig, geneSetID = gse_sig$ID[2], 
  title = paste0(gse_sig$ID[2], 
                ' (p = ', round(gse_sig_tbl$pvalue[2], 3), 
                '; NES = ', round(gse_sig_tbl$NES[2], 2), ')')
) 

## LGR5
gseaplot2(
  gse_sig, geneSetID = gse_sig$ID[3],
  title = paste0(gse_sig$ID[3], 
                ' (p = ', round(gse_sig_tbl$pvalue[3], 3), 
                '; NES = ', round(gse_sig_tbl$NES[3], 2), ')')
)

## EPH
gseaplot2(
  gse_sig, geneSetID = gse_sig$ID[4],
  title = paste0(gse_sig$ID[4], 
                ' (p = ', round(gse_sig_tbl$pvalue[1], 3), 
                '; NES = ', round(gse_sig_tbl$NES[1], 2), ')')
)

CRIS signature

# run gsea with clusterprofiler
df = loadings_expression %>% drop_na() %>%
  mutate(symbol = substr(id, 1, nchar(id)-11)) %>%
  dplyr::select(-id) %>%
  left_join(promise_long_filtered_top) %>%
  dplyr::select(factor1, symbol) %>% distinct() %>%
  arrange(desc(factor1))
## Joining, by = "symbol"
ranks_symbol = setNames(df$factor1, as.character(df$symbol))

gse_cris <- GSEA(
  geneList = ranks_symbol,
  TERM2GENE = cris_sig %>% dplyr::select(signature, symbol),
  nPerm = 1e5,
  minGSSize = 1,
  maxGSSize = 1000,
  pvalueCutoff = 1
)
## preparing geneSet collections...
## GSEA analysis...
## Warning in .GSEA(geneList = geneList, exponent = exponent, minGSSize =
## minGSSize, : We do not recommend using nPerm parameter incurrent and future
## releases
## Warning in fgsea(pathways = geneSets, stats = geneList, nperm = nPerm, minSize
## = minGSSize, : You are trying to run fgseaSimple. It is recommended to use
## fgseaMultilevel. To run fgseaMultilevel, you need to remove the nperm argument
## in the fgsea function call.
## leading edge analysis...
## done...
## output as tibble
gse_cris_tbl <- as_tibble(gse_cris)

gse_cris_tbl
## # A tibble: 5 x 11
##   ID    Description setSize enrichmentScore    NES  pvalue p.adjust qvalues
##   <chr> <chr>         <int>           <dbl>  <dbl>   <dbl>    <dbl> <lgl>  
## 1 CRIS… CRIS-D           41           0.621  2.17  2.26e-5 0.000113 NA     
## 2 CRIS… CRIS-C           75          -0.444 -1.68  1.27e-3 0.00317  NA     
## 3 CRIS… CRIS-A           88          -0.399 -1.55  5.25e-3 0.00875  NA     
## 4 CRIS… CRIS-E           48          -0.351 -1.22  1.56e-1 0.194    NA     
## 5 CRIS… CRIS-B           40           0.249  0.866 7.03e-1 0.703    NA     
## # … with 3 more variables: rank <dbl>, leading_edge <chr>,
## #   core_enrichment <chr>
## proliferation
gseaplot2(
  gse_cris, geneSetID = gse_cris$ID[1],
  title = paste0(gse_cris$ID[1], 
                ' (p = ', round(gse_cris_tbl$pvalue[1], 3), 
                '; NES = ', round(gse_cris_tbl$NES[1], 2), ')')
)

gseaplot2(
  gse_cris, geneSetID = gse_cris$ID[2],
  title = paste0(gse_cris$ID[2], 
                ' (p = ', round(gse_cris_tbl$pvalue[2], 3), 
                '; NES = ', round(gse_cris_tbl$NES[2], 2), ')')
)

drug sensitivity

aggregate_drugs <- drug_activity_test %>% group_by(target) %>% summarise_at(vars(contains("factor")), funs("median"))
## Warning: `funs()` is deprecated as of dplyr 0.8.0.
## Please use a list of either functions or lambdas: 
## 
##   # Simple named list: 
##   list(mean = mean, median = median)
## 
##   # Auto named with `tibble::lst()`: 
##   tibble::lst(mean, median)
## 
##   # Using lambdas
##   list(~ mean(., trim = .2), ~ median(., na.rm = TRUE))
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_warnings()` to see where this warning was generated.
include_drugs <- aggregate_drugs %>% arrange(desc(factor1)) %>% .$target

drug_activity_test %>%
  drop_na() %>%
  mutate(target = factor(target, levels = include_drugs)) %>%
  ggplot(aes(y = target, x = factor1)) + 
  #geom_point() + 
  ggridges::geom_density_ridges() +
  geom_hline(yintercept = 0) + 
  #coord_flip() + 
  cowplot::theme_cowplot()
## Picking joint bandwidth of 0.11

median weighting within the factor was strongest for activity of MEK, IGF1R inhibitors. mTOR inhibitors ranked among the strongest drugs as well. In contrast, EGFR inhibitor activity had the most negative median contribution to the factor. Put differently, organoid lines that had a high score for factor 1 tended to be sensitive to MEK and IGF-1R inhibitors and less responsive to EGFR and Syk inhibitors.

EGFR inhibitor activity has a significant negative contribution to the factor. This means that lines with a high factor1 score, show little to no activity when treated with EGFR inhibitors.

drug_activity_joined <- weights %>%
  mutate(id = substr(id, 1, 4)) %>% 
  group_by(id) %>% 
 summarise_all(funs(mean)) %>% 
  mutate(line = paste0(id, "T01")) %>%
  left_join(drug_activity)
## Joining, by = "line"
doi <- drug_activity_test %>% 
  dplyr::filter(target == "EGFR") %>% 
  .$drug 


drug_activity_joined %>% 
  filter(drug %in% doi) %>% 
  ggplot(aes(factor1, auroc)) +
  geom_point() + 
  cowplot::theme_cowplot() + 
  geom_smooth(method = "lm", se = FALSE, color = "red") + 
  facet_wrap(~ drug)
## `geom_smooth()` using formula 'y ~ x'

drug_activity_joined <- weights %>%
  mutate(id = substr(id, 1, 4)) %>% 
  group_by(id) %>% 
 summarise_all(funs(mean)) %>% 
  mutate(line = paste0(id, "T01")) %>%
  left_join(drug_activity)
## Joining, by = "line"
doi <- drug_activity_test %>% 
  dplyr::filter(target == "MEK") %>% 
  .$drug 


drug_activity_joined %>% 
  filter(drug %in% doi) %>% 
  ggplot(aes(factor1, auroc)) +
  geom_point() + 
  cowplot::theme_cowplot() + 
  geom_smooth(method = "lm", se = FALSE) + 
  facet_wrap(~ drug)
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 1 rows containing non-finite values (stat_smooth).
## Warning: Removed 1 rows containing missing values (geom_point).

drug_activity_joined <- weights %>%
  mutate(id = substr(id, 1, 4)) %>% 
  group_by(id) %>% 
 summarise_all(funs(mean)) %>% 
  mutate(line = paste0(id, "T01")) %>%
  left_join(drug_activity)
## Joining, by = "line"
doi <- drug_activity_test %>% 
  dplyr::filter(target == "IGF-1R") %>% 
  .$drug 


drug_activity_joined %>% 
  filter(drug %in% doi) %>% 
  ggplot(aes(factor1, auroc)) +
  geom_point() + 
  cowplot::theme_cowplot() + 
  geom_smooth(method = "lm", se = FALSE, color = "black") + 
  facet_wrap(~ drug)
## `geom_smooth()` using formula 'y ~ x'

drug_activity_joined <- weights %>%
  mutate(id = substr(id, 1, 4)) %>% 
  group_by(id) %>% 
 summarise_all(funs(mean)) %>% 
  mutate(line = paste0(id, "T01")) %>%
  left_join(drug_activity)
## Joining, by = "line"
doi <- drug_activity_test %>% 
  dplyr::filter(target == "mTOR") %>% 
  .$drug 


drug_activity_joined %>% 
  filter(drug %in% doi) %>% 
  ggplot(aes(factor1, auroc)) +
  geom_point() + 
  cowplot::theme_cowplot() + 
  geom_smooth(method = "lm", se = FALSE, color = "green") + 
  facet_wrap(~ drug)
## `geom_smooth()` using formula 'y ~ x'

within the group of mTOR inhibitors, activity towards treatment with the small molecule WYE-132 had the strongest contribution to the factor. WYE-132 is an ATP competitive inhibitor of mTORC1 and mTORC2

Next we wondered how treatment with members from these highly active groups would change the phenotype of organoids

mutation

df <- mofa_genetics %>% 
  mutate(sample = substr(sample, 1, 4)) %>% 
  distinct() %>%
  filter(feature %in% c("NRAS"))

drug_activity_joined %>%
  dplyr::select(-line, sample = id) %>% 
  filter(drug %in% c("Selumetinib (AZD6244)")) %>%
  left_join(df) %>% 
  drop_na() %>%
  mutate(value = if_else(value == 0, "WT", "mut")) %>%
  ggplot(aes(value, auroc)) + 
  geom_point() + 
  ggsignif::geom_signif(comparisons = list(c("WT", "mut"))) + 
  theme_cowplot() + 
  facet_grid(drug ~ feature)
## Joining, by = "sample"

factor 2

morphology

weights %>% 
  dplyr::rename(line = id) %>%
  left_join(organoid_morphology %>% 
              mutate(line = substr(line, 1, 4)) %>% 
    expand_grid(., rep = c("r1", "r2")) %>% 
      mutate(sample = paste0(line, "_", rep)) %>% dplyr::select(-line, line = sample)) %>%
  distinct() %>%
  ggplot(aes(factor1, factor2, label = line, color = morphology)) + 
  geom_point(size = 4) + 
  ggrepel::geom_text_repel(color = "black") + 
  theme_cowplot() + 
  scale_color_brewer(type = "qual")
## Joining, by = "line"

gene expression

plot_weights(model,
  view = "expression",
  factor = 2,
  nfeatures = 10,     # Number of features to highlight
  scale = T,          # Scale weights from -1 to 1
  abs = F             # Take the absolute value?
)
## Warning: Ignoring unknown parameters: max.overlaps

growth control via IGF signaling cell adhesion via FYN signaling, high fibronectin expression DLX- TGFb and BMP inhibiting transcription factor

reactome and GO

df <- loadings_expression %>% 
  separate(id, c("symbol"), sep = "_exp") %>% 
  filter(symbol != "") %>%
  left_join(promise_long_filtered_top %>% dplyr::select(symbol, entrez) %>% distinct()) %>%
  arrange(desc(factor2))
## Warning: Expected 1 pieces. Additional pieces discarded in 3222 rows [1, 2, 3,
## 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, ...].
## Joining, by = "symbol"
ranks_2 <- setNames(df$factor2, as.character(df$entrez))
ranks_2 <- sort(ranks_2, decreasing = T)

## Reactome enrichment analysis
gse_reactome_2 <- gsePathway(
  geneList = ranks_2,
  organism = 'human',
  #nPerm = 1e5,
  #minGSSize = 100,
  #maxGSSize = 500,
  pvalueCutoff = 0.2
)
## preparing geneSet collections...
## GSEA analysis...
## Warning in preparePathwaysAndStats(pathways, stats, minSize, maxSize,
## gseaParam, : There are duplicate gene names, fgsea may produce unexpected
## results.
## leading edge analysis...
## done...
reactome <- pairwise_termsim(gse_reactome_2) 
emapplot(reactome, color = "NES")

gse_go <- gseGO(
  geneList = ranks_2,
  OrgDb = org.Hs.eg.db,
  ont = 'BP',
  # nPerm = 1e5,
  # minGSSize = 10,
  # maxGSSize = 500,
  pvalueCutoff = 0.2
)

go <- pairwise_termsim(gse_go) 
emapplot(go, color = "NES")

suarez signature

# run gsea with clusterprofiler
df = loadings_expression %>% drop_na() %>%
  mutate(symbol = substr(id, 1, nchar(id)-11)) %>%
  dplyr::select(-id) %>%
  left_join(promise_long_filtered_top) %>%
  dplyr::select(factor2, symbol) %>% distinct() %>%
  arrange(desc(factor2))
## Joining, by = "symbol"
ranks_symbol = setNames(df$factor2, as.character(df$symbol))

gse_sig <- GSEA(
  geneList = ranks_symbol,
  TERM2GENE = intestinal_sig,
  nPerm = 1e5,
  minGSSize = 1,
  maxGSSize = 1000,
  pvalueCutoff = 1
)
## preparing geneSet collections...
## GSEA analysis...
## Warning in .GSEA(geneList = geneList, exponent = exponent, minGSSize =
## minGSSize, : We do not recommend using nPerm parameter incurrent and future
## releases
## Warning in fgsea(pathways = geneSets, stats = geneList, nperm = nPerm, minSize
## = minGSSize, : You are trying to run fgseaSimple. It is recommended to use
## fgseaMultilevel. To run fgseaMultilevel, you need to remove the nperm argument
## in the fgsea function call.
## leading edge analysis...
## done...
## output as tibble
gse_sig_tbl <- as_tibble(gse_sig)
## proliferation
gseaplot2(
  gse_sig, geneSetID = gse_sig$ID[1],
  title = paste0(gse_sig$ID[1], 
                ' (p = ', round(gse_sig_tbl$pvalue[1], 3), 
                '; NES = ', round(gse_sig_tbl$NES[1], 2), ')')
)

## lgr5 signature
gseaplot2(
  gse_sig, geneSetID = gse_sig$ID[2], 
  title = paste0(gse_sig$ID[2], 
                ' (p = ', round(gse_sig_tbl$pvalue[2], 3), 
                '; NES = ', round(gse_sig_tbl$NES[2], 2), ')')
) 

## proliferation
gseaplot2(
  gse_sig, geneSetID = gse_sig$ID[3],
  title = paste0(gse_sig$ID[3], 
                ' (p = ', round(gse_sig_tbl$pvalue[3], 3), 
                '; NES = ', round(gse_sig_tbl$NES[3], 2), ')')
)

## proliferation
gseaplot2(
  gse_sig, geneSetID = gse_sig$ID[4],
  title = paste0(gse_sig$ID[4], 
                ' (p = ', round(gse_sig_tbl$pvalue[4], 3), 
                '; NES = ', round(gse_sig_tbl$NES[4], 2), ')')
)

CRIS signature

# run gsea with clusterprofiler
df = loadings_expression %>% drop_na() %>%
  mutate(symbol = substr(id, 1, nchar(id)-11)) %>%
  dplyr::select(-id) %>%
  left_join(promise_long_filtered_top) %>%
  dplyr::select(factor2, symbol) %>% distinct() %>%
  arrange(desc(factor2))
## Joining, by = "symbol"
ranks_symbol = setNames(df$factor2, as.character(df$symbol))

gse_cris <- GSEA(
  geneList = ranks_symbol,
  TERM2GENE = cris_sig %>% dplyr::select(signature, symbol),
  nPerm = 1e5,
  minGSSize = 1,
  maxGSSize = 1000,
  pvalueCutoff = 1
)
## preparing geneSet collections...
## GSEA analysis...
## Warning in .GSEA(geneList = geneList, exponent = exponent, minGSSize =
## minGSSize, : We do not recommend using nPerm parameter incurrent and future
## releases
## Warning in fgsea(pathways = geneSets, stats = geneList, nperm = nPerm, minSize
## = minGSSize, : You are trying to run fgseaSimple. It is recommended to use
## fgseaMultilevel. To run fgseaMultilevel, you need to remove the nperm argument
## in the fgsea function call.
## leading edge analysis...
## done...
## output as tibble
gse_cris_tbl <- as_tibble(gse_cris)

gse_cris_tbl
## # A tibble: 5 x 11
##   ID    Description setSize enrichmentScore    NES  pvalue p.adjust qvalues
##   <chr> <chr>         <int>           <dbl>  <dbl>   <dbl>    <dbl> <lgl>  
## 1 CRIS… CRIS-E           48          -0.657 -2.54  4.38e-5 0.000154 NA     
## 2 CRIS… CRIS-A           88          -0.429 -1.87  6.15e-5 0.000154 NA     
## 3 CRIS… CRIS-C           75          -0.362 -1.53  5.44e-3 0.00907  NA     
## 4 CRIS… CRIS-D           41           0.456  1.44  3.90e-2 0.0487   NA     
## 5 CRIS… CRIS-B           40          -0.251 -0.934 5.79e-1 0.579    NA     
## # … with 3 more variables: rank <dbl>, leading_edge <chr>,
## #   core_enrichment <chr>
## proliferation
gseaplot2(
  gse_cris, geneSetID = gse_cris$ID[3],
  title = paste0(gse_cris$ID[3], 
                ' (p = ', round(gse_cris_tbl$pvalue[3], 5), 
                '; NES = ', round(gse_cris_tbl$NES[3], 2), ')')
)

## proliferation
gseaplot2(
  gse_cris, geneSetID = gse_cris$ID[1],
  title = paste0(gse_cris$ID[1], 
                ' (p = ', round(gse_cris_tbl$pvalue[1], 5), 
                '; NES = ', round(gse_cris_tbl$NES[1], 2), ')')
)

gseaplot2(
  gse_cris, geneSetID = gse_cris$ID[4],
  title = paste0(gse_cris$ID[4], 
                ' (p = ', round(gse_cris_tbl$pvalue[4], 5), 
                '; NES = ', round(gse_cris_tbl$NES[4], 2), ')')
)

drug sensitivity

aggregate_drugs <- drug_activity_test %>% group_by(target) %>% summarise_at(vars(contains("factor")), funs("median"))

include_drugs <- aggregate_drugs %>% arrange(desc(factor2)) %>% .$target

drug_activity_test %>%
  drop_na() %>%
  mutate(target = factor(target, levels = include_drugs)) %>%
  ggplot(aes(y = target, x = factor2)) + 
  #geom_point() + 
  ggridges::geom_density_ridges() +
  geom_hline(yintercept = 0) + 
  #coord_flip() + 
  cowplot::theme_cowplot()
## Picking joint bandwidth of 0.104

median weighting within the factor was strongest for activity of Wnt/bcatenin, Src, EGFR and FAK inhibitors. In contrast, MEK, ERK inhibitor activity had among the most negative median contribution to the factor, similarly mTOR inhibitors, albeit not statistically significant. Put differently, organoid lines that had a high score for factor 2 tended to be sensitive to Wnt and EGFR targeting and less responsive to MEK and ERK inhibitors.

drug_activity_joined <- weights %>%
  mutate(id = substr(id, 1, 4)) %>% 
  group_by(id) %>% 
 summarise_all(funs(mean)) %>% 
  mutate(line = paste0(id, "T01")) %>%
  left_join(drug_activity)
## Joining, by = "line"
doi <- drug_activity_test %>% 
  dplyr::filter(target == "EGFR") %>% 
  .$drug 


drug_activity_joined %>% 
  filter(drug %in% doi) %>% 
  ggplot(aes(factor2, auroc)) +
  geom_point() + 
  cowplot::theme_cowplot() + 
  geom_smooth(method = "lm", se = FALSE, color = "red") + 
  facet_wrap(~ drug)
## `geom_smooth()` using formula 'y ~ x'

drug_activity_joined <- weights %>%
  mutate(id = substr(id, 1, 4)) %>% 
  group_by(id) %>% 
 summarise_all(funs(mean)) %>% 
  mutate(line = paste0(id, "T01")) %>%
  left_join(drug_activity)
## Joining, by = "line"
doi <- drug_activity_test %>% 
  dplyr::filter(target == "MEK") %>% 
  .$drug 


drug_activity_joined %>% 
  filter(drug %in% doi) %>% 
  ggplot(aes(factor2, auroc)) +
  geom_point() + 
  cowplot::theme_cowplot() + 
  geom_smooth(method = "lm", se = FALSE) + 
  facet_wrap(~ drug)
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 1 rows containing non-finite values (stat_smooth).
## Warning: Removed 1 rows containing missing values (geom_point).

doi <- drug_activity_test %>% 
  dplyr::filter(target == "Wnt/beta-catenin") %>% 
  .$drug 


drug_activity_joined %>% 
  filter(drug %in% doi) %>% 
  ggplot(aes(factor2, auroc)) +
  geom_point() + 
  cowplot::theme_cowplot() + 
  geom_smooth(method = "lm", se = FALSE, color = "black") + 
  facet_wrap(~ drug)
## `geom_smooth()` using formula 'y ~ x'

drug_activity_joined <- weights %>%
  mutate(id = substr(id, 1, 4)) %>% 
  group_by(id) %>% 
 summarise_all(funs(mean)) %>% 
  mutate(line = paste0(id, "T01")) %>%
  left_join(drug_activity)
## Joining, by = "line"
doi <- drug_activity_test %>% 
  dplyr::filter(target == "mTOR") %>% 
  .$drug 


drug_activity_joined %>% 
  filter(drug %in% doi) %>% 
  ggplot(aes(factor2, auroc)) +
  geom_point() + 
  cowplot::theme_cowplot() + 
  geom_smooth(method = "lm", se = FALSE, color = "green") + 
  facet_wrap(~ drug)
## `geom_smooth()` using formula 'y ~ x'

again, within the group of mTOR inhibitors, activity towards treatment with the small molecule WYE-132 had the strongest contribution to the factor - showing less activity in factor 2 high organoids compared to factor 2 low organoid lines.

factor 3

morphology

gene expression

plot_weights(model,
  view = "expression",
  factor = 3,
  nfeatures = 10,     # Number of features to highlight
  scale = T,          # Scale weights from -1 to 1
  abs = F             # Take the absolute value?
)
## Warning: Ignoring unknown parameters: max.overlaps

NRX cell adhesion xenobiotic metabolism

reactome and GO

df <- loadings_expression %>% 
  separate(id, c("symbol"), sep = "_exp") %>% 
  filter(symbol != "") %>%
  left_join(promise_long_filtered_top %>% dplyr::select(symbol, entrez) %>% distinct()) %>%
  arrange(desc(factor3))
## Warning: Expected 1 pieces. Additional pieces discarded in 3222 rows [1, 2, 3,
## 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, ...].
## Joining, by = "symbol"
ranks_3 <- setNames(df$factor3, as.character(df$entrez))
ranks_3 <- sort(ranks_3, decreasing = T)

## Reactome enrichment analysis
gse_reactome_3 <- gsePathway(
  geneList = ranks_3,
  organism = 'human',
  #nPerm = 1e5,
  #minGSSize = 100,
  #maxGSSize = 500,
  pvalueCutoff = 0.2
)
## preparing geneSet collections...
## GSEA analysis...
## Warning in preparePathwaysAndStats(pathways, stats, minSize, maxSize,
## gseaParam, : There are duplicate gene names, fgsea may produce unexpected
## results.
## leading edge analysis...
## done...
reactome <- pairwise_termsim(gse_reactome_3) 
emapplot(reactome, color = "NES")

gse_go <- gseGO(
  geneList = ranks_3,
  OrgDb = org.Hs.eg.db,
  ont = 'BP',
  # nPerm = 1e5,
  # minGSSize = 10,
  # maxGSSize = 500,
  pvalueCutoff = 0.2
)

go <- pairwise_termsim(gse_go) 
emapplot(go, color = "NES")

suarez signature

# run gsea with clusterprofiler
df = loadings_expression %>% drop_na() %>%
  mutate(symbol = substr(id, 1, nchar(id)-11)) %>%
  dplyr::select(-id) %>%
  left_join(promise_long_filtered_top) %>%
  dplyr::select(factor3, symbol) %>% distinct() %>%
  arrange(desc(factor3))
## Joining, by = "symbol"
ranks_symbol = setNames(df$factor3, as.character(df$symbol))

gse_sig <- GSEA(
  geneList = ranks_symbol,
  TERM2GENE = intestinal_sig,
  nPerm = 1e5,
  minGSSize = 1,
  maxGSSize = 1000,
  pvalueCutoff = 1
)
## preparing geneSet collections...
## GSEA analysis...
## Warning in .GSEA(geneList = geneList, exponent = exponent, minGSSize =
## minGSSize, : We do not recommend using nPerm parameter incurrent and future
## releases
## Warning in fgsea(pathways = geneSets, stats = geneList, nperm = nPerm, minSize
## = minGSSize, : You are trying to run fgseaSimple. It is recommended to use
## fgseaMultilevel. To run fgseaMultilevel, you need to remove the nperm argument
## in the fgsea function call.
## leading edge analysis...
## done...
## output as tibble
gse_sig_tbl <- as_tibble(gse_sig)
## proliferation
gseaplot2(
  gse_sig, geneSetID = gse_sig$ID[1],
  title = paste0(gse_sig$ID[1], 
                ' (p = ', round(gse_sig_tbl$pvalue[1], 3), 
                '; NES = ', round(gse_sig_tbl$NES[1], 2), ')')
)

## lgr5 signature
gseaplot2(
  gse_sig, geneSetID = gse_sig$ID[2], 
  title = paste0(gse_sig$ID[2], 
                ' (p = ', round(gse_sig_tbl$pvalue[2], 3), 
                '; NES = ', round(gse_sig_tbl$NES[2], 2), ')')
) 

## proliferation
gseaplot2(
  gse_sig, geneSetID = gse_sig$ID[3],
  title = paste0(gse_sig$ID[3], 
                ' (p = ', round(gse_sig_tbl$pvalue[3], 3), 
                '; NES = ', round(gse_sig_tbl$NES[3], 2), ')')
)

## proliferation
gseaplot2(
  gse_sig, geneSetID = gse_sig$ID[4],
  title = paste0(gse_sig$ID[4], 
                ' (p = ', round(gse_sig_tbl$pvalue[4], 3), 
                '; NES = ', round(gse_sig_tbl$NES[4], 2), ')')
)

CRIS signature

set.seed(1234)

# run gsea with clusterprofiler
df = loadings_expression %>% drop_na() %>%
  mutate(symbol = substr(id, 1, nchar(id)-11)) %>%
  dplyr::select(-id) %>%
  left_join(promise_long_filtered_top) %>%
  dplyr::select(factor3, symbol) %>% distinct() %>%
  arrange(desc(factor3))
## Joining, by = "symbol"
ranks_symbol = setNames(df$factor3, as.character(df$symbol))

gse_cris <- GSEA(
  geneList = ranks_symbol,
  TERM2GENE = cris_sig %>% dplyr::select(signature, symbol),
  nPerm = 1e5,
  minGSSize = 1,
  maxGSSize = 1000,
  pvalueCutoff = 1
)
## preparing geneSet collections...
## GSEA analysis...
## Warning in .GSEA(geneList = geneList, exponent = exponent, minGSSize =
## minGSSize, : We do not recommend using nPerm parameter incurrent and future
## releases
## Warning in fgsea(pathways = geneSets, stats = geneList, nperm = nPerm, minSize
## = minGSSize, : You are trying to run fgseaSimple. It is recommended to use
## fgseaMultilevel. To run fgseaMultilevel, you need to remove the nperm argument
## in the fgsea function call.
## leading edge analysis...
## done...
## output as tibble
gse_cris_tbl <- as_tibble(gse_cris)

gse_cris_tbl
## # A tibble: 5 x 11
##   ID    Description setSize enrichmentScore   NES  pvalue p.adjust qvalues  rank
##   <chr> <chr>         <int>           <dbl> <dbl>   <dbl>    <dbl> <lgl>   <dbl>
## 1 CRIS… CRIS-B           40          -0.650 -2.02 6.30e-5 0.000315 NA        246
## 2 CRIS… CRIS-A           88          -0.475 -1.70 3.27e-4 0.000818 NA        499
## 3 CRIS… CRIS-E           48           0.500  1.43 3.40e-2 0.0533   NA        708
## 4 CRIS… CRIS-C           75           0.446  1.36 4.26e-2 0.0533   NA        528
## 5 CRIS… CRIS-D           41           0.484  1.35 7.24e-2 0.0724   NA        564
## # … with 2 more variables: leading_edge <chr>, core_enrichment <chr>
## proliferation
gseaplot2(
  gse_cris, geneSetID = gse_cris$ID[1],
  title = paste0(gse_cris$ID[1], 
                ' (p = ', round(gse_cris_tbl$pvalue[1], 3), 
                '; NES = ', round(gse_cris_tbl$NES[1], 2), ')')
)

gseaplot2(
  gse_cris, geneSetID = gse_cris$ID[2],
  title = paste0(gse_cris$ID[2], 
                ' (p = ', round(gse_cris_tbl$pvalue[2], 3), 
                '; NES = ', round(gse_cris_tbl$NES[2], 2), ')')
)

drug sensitivity

aggregate_drugs <- drug_activity_test %>% group_by(target) %>% summarise_at(vars(contains("factor")), funs("median"))

include_drugs <- aggregate_drugs %>% arrange(desc(factor3)) %>% .$target

drug_activity_test %>%
  drop_na() %>%
  mutate(target = factor(target, levels = include_drugs)) %>%
  ggplot(aes(y = target, x = factor3)) + 
  geom_vline(xintercept = 0) + 
  #geom_point() + 
  ggridges::geom_density_ridges() +
  
  #coord_flip() + 
  cowplot::theme_cowplot()
## Picking joint bandwidth of 0.0945

median weighting within the factor was strongest for activity of AURK inhibitors. In contrast, GSK and EGFR inhibitor activity had the most negative median contribution to the factor. Put differently, organoid lines that had a high score for factor 3 tended to be sensitive to ARK targeting and less responsive to GSK and EGFR inhibitors.

doi <- drug_activity_test %>% 
  dplyr::filter(target == "EGFR") %>% 
  .$drug 


drug_activity_joined %>% 
  filter(drug %in% doi) %>% 
  ggplot(aes(factor3, auroc)) +
  geom_point() + 
  cowplot::theme_cowplot() + 
  geom_smooth(method = "lm", se = FALSE, color = "red") + 
  facet_wrap(~ drug)
## `geom_smooth()` using formula 'y ~ x'

doi <- drug_activity_test %>% 
  dplyr::filter(target == "GSK-3") %>% 
  .$drug 


drug_activity_joined %>% 
  filter(drug %in% doi) %>% 
  ggplot(aes(factor3, auroc)) +
  geom_point() + 
  cowplot::theme_cowplot() + 
  geom_smooth(method = "lm", se = FALSE) + 
  facet_wrap(~ drug)
## `geom_smooth()` using formula 'y ~ x'

doi <- drug_activity_test %>% 
  dplyr::filter(target == "Aurora Kinase") %>% 
  .$drug 


drug_activity_joined %>% 
  filter(drug %in% doi) %>% 
  ggplot(aes(factor3, auroc)) +
  geom_point() + 
  cowplot::theme_cowplot() + 
  geom_smooth(method = "lm", se = FALSE, color = "black") + 
  facet_wrap(~ drug)
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 1 rows containing non-finite values (stat_smooth).
## Warning: Removed 1 rows containing missing values (geom_point).

again, within the group of mTOR inhibitors, activity towards treatment with the small molecule WYE-132 had the strongest contribution to the factor - showing less activity in factor 2 high organoids compared to factor 2 low organoid lines.